In [1]:
# Data downloaded from: http://berkeleyearth.lbl.gov/auto/Global/Gridded/Complete_TAVG_EqualArea.nc
!md5sum /tmp/Complete_TAVG_EqualArea.nc


276321d42c9c86c47808129d0bf0b843  /tmp/Complete_TAVG_EqualArea.nc

In [2]:
%pylab inline
from netCDF4 import Dataset
from numpy import array, size, average
from scipy.stats import nanmean

def running_average(temp, n=5, m=6, unc=False):
    """
    Calculates centered running average from -n to +m points (total of n+m+1 points).

    Example 1: if n=m=6 and the data is monthly, then the result is a centered 13 month running average.

    Example 2: if n=5, m=6 and the data is monthly, then the result is a "centered" 12 month moving average
                (another way to center it is with n=6, m=5).

    unc ... if True, calculate the average of uncertainties
    """
    avg = temp.copy()
    for i in range(n, size(temp)-m):
        window = temp[i-n:i+m+1]
        if unc:
            avg[i] = sqrt(sum(window**2))/len(window)
        else:
            avg[i] = average(window)
    avg[:n] = NaN
    avg[-m:] = NaN
    return avg

def month_to_year(temp, unc=False):
    return running_average(temp, 5, 6, unc) # 5+6+1 = 12 months

def month_to_10years(temp, unc=False):
    return running_average(temp, 59, 60, unc) #59+60+1 = 120 months

def month_to_20years(temp, unc=False):
    return running_average(temp, 119, 120, unc) #119+120+1 = 240 months


Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.zmq.pylab.backend_inline].
For more information, type 'help(pylab)'.

In [3]:
rootgrp = Dataset('/tmp/Complete_TAVG_EqualArea.nc')
rootgrp.variables


Out[3]:
OrderedDict([(u'longitude', <netCDF4.Variable object at 0x2c62950>), (u'latitude', <netCDF4.Variable object at 0x2c629d0>), (u'time', <netCDF4.Variable object at 0x2c62a50>), (u'month_number', <netCDF4.Variable object at 0x2c62ad0>), (u'land_mask', <netCDF4.Variable object at 0x2c62b50>), (u'temperature', <netCDF4.Variable object at 0x2c62bd0>), (u'longitude2', <netCDF4.Variable object at 0x2c62c50>)])

In [4]:
longitude = rootgrp.variables["longitude"][:]
latitude = rootgrp.variables["latitude"][:]
time = rootgrp.variables["time"][:]
month_number = rootgrp.variables["month_number"][:]
land_mask = rootgrp.variables["land_mask"][:]
temperature = rootgrp.variables["temperature"][:]
longitude2 = rootgrp.variables["longitude2"][:]

In [5]:
Tavg = nanmean(temperature, axis=1)
T0 = 8.79


/home/ondrej/repos/python-hpcmp2/opt/profile/gwlj/lib/python2.7/site-packages/scipy/stats/stats.py:346: RuntimeWarning: invalid value encountered in true_divide
  return np.mean(x,axis)/factor

In [6]:
Tk = temperature+273.15+T0
r = 4
Tavg4 = nanmean(Tk**r, axis=1)**(1./r)-273.15

In [7]:
Tavg+T0


Out[7]:
array([ 9.36376349,  9.77344344,  9.6731016 , ...,  9.91981582,
        9.82924799,  9.7236465 ])

In [8]:
Tavg4


Out[8]:
array([ 9.37470448,  9.78570963,  9.67893399, ...,  9.927468  ,
        9.84658341,  9.75064914])

In [9]:
figure(figsize=(8, 6))
plot(time, Tavg + T0, "k-", label="Tavg")
plot(time, Tavg4, "b-", label="Tavg4")
xlabel("Year")
ylabel("Monthly temperature [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
grid()
legend();



In [10]:
figure(figsize=(8, 6))
plot(time, month_to_year(Tavg)+T0, "k-")
plot(time, month_to_year(Tavg4), "b-")
xlabel("Year")
ylabel("Annual temperature [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
ylim([6.5, 10.5])
grid()



In [11]:
figure(figsize=(8, 6))
plot(time, month_to_10years(Tavg)+T0, "k-")
plot(time, month_to_10years(Tavg4), "b-")
xlabel("Year")
ylabel("Decadal temperature [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
ylim([7.5, 10])
grid()


Let's compare against the original Full_TAVG_complete.txt file:


In [12]:
D = loadtxt("data/Full_TAVG_complete.txt", comments="%")
# Read from file:
T0 = 8.79
# Construct floating point years using years + months
years_int = D[:, 0]
months = D[:, 1]
years = years_int + (months-0.5) / 12
temp_month = D[:, 2]
unc_month = D[:, 3]

Check that the years exactly agree:


In [13]:
max(years-time)


Out[13]:
2.2737367544323206e-13

In [14]:
figure(figsize=(8, 6))
plot(time, temp_month + T0, "b-", label="Full_TAVG_complete.txt")
plot(time, Tavg + T0, "k-", label="our")
xlabel("Year")
ylabel("Monthly temperature [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
grid()
legend();


Let's plot the errors:


In [15]:
figure(figsize=(8, 6))
semilogy(time, abs(temp_month-Tavg), "k-")
xlabel("Year")
ylabel("Monthly temperature differences [$^\circ C$]")
title("Berkeley Averaging method")
xlim([1750, 2015])
grid();


-c:2: RuntimeWarning: invalid value encountered in absolute

As can be seen, we are able to get similar temperatures, typically 1C off around the year 1800, and 0.1C off from 1850 and later.